#---------------------------------------------------------------------
# Script for replicating results on competition for antidumping duties
# in Egerod and Junk (2022)
#
# The output of this script will correspond to Figure 5 in the article
# 
#
# This is a small replication of Egerod and Justesen (2021). See more detailed results in Egerod and Justesen (2021)
#--------------------------------------------------------------------

# set working directory to script location

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))


#packages
library(sp); library(spdep);  library(splm); library(igraph)
library(stringr); library(reshape); library(reshape2); library(plyr); library(dplyr); 
library(plm); library(readr); library(ggplot2); library(grid); library(gridExtra)
library(erer); library(vcd); library(oddsratio); library(stargazer)
library(lme4); library(arm)

#load data
no_miss <- read_csv("AntidumpingData.csv")


########################################################################################
# generate spatial weights matrix

spat.year <- t(combn(no_miss$year, m = 2))
spat.case <- t(combn(no_miss$CASE_ID, m = 2))
spat.product <- t(combn(no_miss$PRODUCT, m = 2))
spat.country <- t(combn(no_miss$AD_CTY_NAME, m = 2))

w.id <- t(combn(no_miss$cleanname, m=2))

spat <- data.frame(spat.year, spat.case, spat.product, spat.country, w.id)
names(spat) <- c("year_i", "year_j", "case_i", "case_j", "product_i", "product_j", "country_i", "country_j", "name_i", "name_j")

spat$case_i <- as.character(spat$case_i)
spat$case_j <- as.character(spat$case_j)
spat$product_i <- as.character(spat$product_i)
spat$product_j <- as.character(spat$product_j)
spat$country_i <- as.character(spat$country_i)
spat$country_j <- as.character(spat$country_j)
spat$name_i <- as.character(spat$name_i)
spat$name_j <- as.character(spat$name_j)

weight <- ifelse(spat$year_i == spat$year_j & # within the same year,
                   spat$case_i != spat$case_j & # not on the same petition,
                   spat$product_i == spat$product_j & # for the same product,
                   spat$country_i == spat$country_j &  #from the same home-country,
                   spat$name_i != spat$name_j, 1, 0) # not the same firm.

w.id <- row.names(no_miss)
w.id <- t(combn(w.id, m = 2))

w.edge <- data.frame(w.id, as.numeric(as.character(weight)))
w.edge[,1] <- as.character(w.edge[,1])
w.edge[,2]<- as.character(w.edge[,2])
names(w.edge)[3]<- "weight"

w.mat <- as.matrix(w.edge)
w.mat[,1] <- as.character(w.mat[,1])
w.mat[,2]<- as.character(w.mat[,2])

w.mat2 <- graph.edgelist(w.mat[,1:2], directed = F)
E(w.mat2)$weight <- as.numeric(as.character(w.edge$weight))

panel.weight <- get.adjacency(w.mat2, attr='weight')
panel.weight <- as.matrix(panel.weight)
panel.weight <- mat2listw(panel.weight)

#---------------------------------------------------------
# generate w matrix for use in spatially lagging manually

slx.weight <- get.adjacency(w.mat2, attr='weight')
slx.weight <- as.matrix(slx.weight)

#----------------------------------------------------------
# Generate covariates
no_miss$mobility <- no_miss$fixed_assets / no_miss$total_assets
no_miss$log_rev <- log(no_miss$revenue)
no_miss$log_tax <- log(no_miss$taxation+ 119067.3)
no_miss$log_cap <- log(no_miss$capital+.5)
no_miss$log_assets <- log(no_miss$total_assets)

# spatially lagged Xs
no_miss$SLX <- slx.weight %*% as.vector(no_miss$mobility)

no_miss$SLX2 <- slx.weight %*% as.vector(log(no_miss$revenue))
no_miss$SLX3 <- slx.weight %*% as.vector(log(no_miss$total_assets))
no_miss$SLX4 <- slx.weight %*% as.vector(log(no_miss$taxation + 119067.3))
no_miss$SLX5 <- slx.weight %*% as.vector(log(no_miss$capital+.5))

#---------------------------------------------------------
# baseline model

slx.mod <- lagsarlm(log(wto_final+.5) ~ I(fixed_assets/total_assets) + SLX + log(revenue) + log(total_assets) + log(taxation + 119067.3) + log(capital+.5)+ SLX2 + SLX3 + SLX4 + SLX5 + AD_CTY_NAME + factor(year),
                    data = no_miss, panel.weight, method="eigen",  zero.policy=TRUE, tol.solve=1.0e-11, control=list(fdHess=TRUE))
summary(slx.mod)

#bootstrap
set.seed(182)
slx.imp <- impacts(slx.mod, listw = panel.weight, R = 500, zstats = T)
summary(slx.imp)

res_dir <- as.data.frame(t(apply(slx.imp$sres$direct[,c(1,3:6)],2,quantile,
                 probs = c(0.025, 0.05, 0.5, 0.95,0.975))))
res_dir$spec <- "Direct"


res_indir <- as.data.frame(t(apply(slx.imp$sres$indirect[,c(1,3:6)],2,quantile,
                   probs = c(0.025, 0.05, 0.5, 0.95,0.975))))
res_indir$spec <- "Indirect"

res <- rbind(res_dir, res_indir)
names(res) <- c("lwr95", "lwr90", "pe", "upr90", "upr95", "spec")

res$pred <- c("Specificity", "Revenue", "Total Assets",
              "Taxation", "Capital")
res$pred_num <- c(2,5,3,4,1)

#------------------------------------------------------------------------------
# product FE
prod2 <- lagsarlm(log(wto_final +.5) ~ I(fixed_assets/total_assets) + SLX + log(revenue)+
                    log(total_assets) + log(taxation+ 119067.3) + log(capital+.5) + SLX2 + SLX3 + SLX4 + SLX5 + prod2 + AD_CTY_NAME + factor(year),
                  data = no_miss, panel.weight, method="eigen",  zero.policy=TRUE, tol.solve=1.0e-11, control=list(fdHess=TRUE))
summary(prod2)

rho_parm <- c(round(slx.mod$rho, digits = 3 ),
round(prod2$rho, digits = 3 ))

rho_parm_se <- c(round(slx.mod$rho.se, digits = 4 ),
              round(prod2$rho.se, digits = 4 ))

# regression table
stargazer(slx.mod, prod2, omit = c("prod2", "year", "AD", "SLX"),
          covariate.labels = c("Asset Specificity", "Revenue (logged)",
                               "Total Assets (logged)", "Taxation (logged)",
                               "Capital (logged)"),
          title = "Linear SAR Coefficients:  Antidumping-Seeking Firms",
          dep.var.labels = "Duty Size (logged)",
          add.lines = list(c("Spatial Parameter", rho_parm),
                           c(" ", rho_parm_se)),
          out = "antidumping_regressions.html",
          type = "html")

#bootstrap
set.seed(182)
prod.boots <- impacts(prod2, listw = panel.weight, R = 500, zstats = T)
summary(prod.boots)

res_fe_dir <- as.data.frame(t(apply(prod.boots$sres$direct[,c(1,3:6)],2,quantile,
                                 probs = c(0.025, 0.05, 0.5, 0.95,0.975))))
res_fe_dir$spec <- "Direct"


res_fe_indir <- as.data.frame(t(apply(prod.boots$sres$indirect[,c(1,3:6)],2,quantile,
                                   probs = c(0.025, 0.05, 0.5, 0.95,0.975))))
res_fe_indir$spec <- "Indirect"

res_fe <- rbind(res_fe_dir, res_fe_indir)
names(res_fe) <- c("lwr95", "lwr90", "pe", "upr90", "upr95", "spec")

res_fe$pred <- c("Specificity", "Revenue", "Total Assets",
              "Taxation", "Capital")
res_fe$pred_num <- c(2,5,3,4,1)


#-------------------------------------------------
# plot only with revenue


r_res<- res%>%filter(pred == "Revenue")
r_res_fe <- filter(res_fe, pred == "Revenue")

r_res <- rbind(r_res, r_res_fe)
r_res$pred <- c("Revenue\nNo Product FE", "Revenue\nNo Product FE",
                "Revenue\nWith Product FE","Revenue\nWith Product FE")

r_res$pred_num <-c(2,2,1,1) 
r_res$spec <- c("A: Direct Effects", "B: Indirect Effects",
                "A: Direct Effects", "B: Indirect Effects")

# for presentational purposed, we limit the upper end of the CI on the indirect effects
r_res <- r_res %>% 
  mutate(LCL_l = ifelse(upr95 > 3, 3, upr95), 
         UCL_l = ifelse(upr90 > 3, 3, upr90),
         lim_help = ifelse(upr95 > 3, 3, NA))


ann_text <- data.frame(pe = r_res$pe, pred = r_res$pred, pred_num = r_res$pred_num+0.09, 
                       lab = c(rho_parm[1],NA,rho_parm[2], NA), lab.se = c(rho_parm_se[1],NA,rho_parm_se[2], NA),
                       spec = factor(r_res$spec))
ann_text2 <- data.frame(pe = r_res$pe, pred = r_res$pred, pred_num = r_res$pred_num+0.05, 
                        lab = c(rho_parm[1],NA,rho_parm[2], NA), lab.se = c(rho_parm_se[1],NA,rho_parm_se[2], NA),
                        spec = factor(r_res$spec))

ann_text$cors <- paste(c("rho==", NA, "rho==", NA), c(ann_text$lab),
                       sep = "")
ann_text$cors[c(2,4)] <- NA
ann_text$lab.se <- format(ann_text$lab.se, scientific=F)


ann_text2$SEs <- paste("(", ann_text$lab.se, ")",
                       sep = "")
ann_text2$SEs[c(2,4)] <- NA


rev_p <- ggplot(r_res, aes(x = pe, y = pred_num)) +
  geom_point() +
  geom_errorbarh(aes(xmin = lwr95, xmax = LCL_l), # change this to get the correct (not limited) CIs
                 colour = "grey",
                 height = 0.05) +
  geom_errorbarh(aes(xmin = lwr90, xmax = UCL_l), 
                 height = 0) +
  theme_bw() +
  facet_wrap(~ spec, scales = "free_x") +
  geom_vline(xintercept = 0, lty = 3) +
  labs(x = "Estimate", y = NULL)+
  geom_segment(aes(x = pe, xend = lim_help, y = pred_num, yend = pred_num), 
                arrow = arrow(length = unit(0.15, "cm")))  + 
  scale_y_continuous(limits = c(0.75,2.25),
                     breaks = c(1,2), labels = as.character(unique(r_res$pred))) +
  geom_text(data = ann_text, aes(label = cors), size = 3, parse = T) +
  geom_text(data = ann_text2, aes(label = SEs), size = 3) 

rev_p

ggsave(plot = rev_p,
       filename = "ADResults.jpg", device = "jpg",
       width = 6.99, height = 4.99) 
